`%>%` <- magrittr::`%>%`

Definition of delta statistic

stat_auc <- function(x, y) {
  measure <- c(x, y)
  classes <- c(rep("X", length(x)), rep("Y", length(y)))
  return(rocauc::auc_by(measure, classes, "Y") - 0.5)
}
apply_stat <- function(dx, dy, var, stat) return(stat(dx[[var]], dy[[var]]))

Plot of English statistics for segments attested with frequency >= 5

## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Number of potential consonants by language

ncons_by_lang <- added_consonants %>% 
  dplyr::select_at(dplyr::vars(-labels, -freq, -scores)) %>% 
  apply(2, sum) %>%
  (function(x) tibble::tibble(nsegs=x, language=names(x)))

Top N languages

N_LANG <- 10
dplyr::arrange(ncons_by_lang, -nsegs) %>% head(N_LANG) %>% print
## # A tibble: 10 x 2
##    nsegs language
##    <dbl> <chr>   
##  1    61 uby     
##  2    51 nmn     
##  3    49 ady     
##  4    38 gdo     
##  5    38 mrt     
##  6    37 sna     
##  7    36 tkr     
##  8    35 lez     
##  9    33 ven     
## 10    31 kbd
## [[1]]
##            stat_econ  stat_loc  stat_glob
## stat_econ  1.0000000 0.0764968 -0.4032222
## stat_loc   0.0764968 1.0000000  0.1912467
## stat_glob -0.4032222 0.1912467  1.0000000
## 
## [[2]]
##             stat_econ    stat_loc  stat_glob
## stat_econ  1.00000000 -0.09232765 -0.3826825
## stat_loc  -0.09232765  1.00000000  0.2879037
## stat_glob -0.38268249  0.28790375  1.0000000
## 
## [[3]]
##             stat_econ   stat_loc  stat_glob
## stat_econ  1.00000000 0.09481393 -0.4019823
## stat_loc   0.09481393 1.00000000  0.2743994
## stat_glob -0.40198228 0.27439944  1.0000000
## 
## [[4]]
##            stat_econ   stat_loc  stat_glob
## stat_econ  1.0000000  0.0540669 -0.4253403
## stat_loc   0.0540669  1.0000000 -0.2374706
## stat_glob -0.4253403 -0.2374706  1.0000000
## 
## [[5]]
##             stat_econ    stat_loc  stat_glob
## stat_econ  1.00000000 -0.03807736 -0.4469134
## stat_loc  -0.03807736  1.00000000  0.1637639
## stat_glob -0.44691344  0.16376394  1.0000000
## 
## [[6]]
##            stat_econ  stat_loc  stat_glob
## stat_econ  1.0000000 0.2734516 -0.2668327
## stat_loc   0.2734516 1.0000000  0.3819063
## stat_glob -0.2668327 0.3819063  1.0000000
## 
## [[7]]
##             stat_econ  stat_loc   stat_glob
## stat_econ  1.00000000 0.2729433 -0.02274032
## stat_loc   0.27294326 1.0000000  0.12066396
## stat_glob -0.02274032 0.1206640  1.00000000
## 
## [[8]]
##            stat_econ   stat_loc  stat_glob
## stat_econ  1.0000000  0.2886127 -0.4142538
## stat_loc   0.2886127  1.0000000 -0.1251715
## stat_glob -0.4142538 -0.1251715  1.0000000
## 
## [[9]]
##            stat_econ   stat_loc  stat_glob
## stat_econ  1.0000000  0.1882623 -0.2694531
## stat_loc   0.1882623  1.0000000 -0.0700509
## stat_glob -0.2694531 -0.0700509  1.0000000
## 
## [[10]]
##            stat_econ  stat_loc  stat_glob
## stat_econ  1.0000000 0.1083463 -0.3616778
## stat_loc   0.1083463 1.0000000  0.0190826
## stat_glob -0.3616778 0.0190826  1.0000000

Merge the five “common” languages that would work OK (Hindi, Malayalam, Venda, Ndebele, and Kabardian)

stats %>%
  dplyr::filter(hin == 1 | mal == 1 | ven == 1 | nbl == 1 | kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi by itself

stats %>%
  dplyr::filter(hin == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays
write.csv(stats$labels,file="labels.csv")

Kabardian by itself

stats %>%
  dplyr::filter(kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Kabardian prime by itself

stats %>%
  dplyr::filter(kbd_prime == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Kabardian prime and hindi

stats %>%
  dplyr::filter(kbd_prime == 1|hin ==1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi and Kabardian

stats %>%
  dplyr::filter(hin == 1 | kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi and Malayalam

stats %>%
  dplyr::filter(hin == 1 | mal == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi, Malayalam, and Georgian

stats %>%
  dplyr::filter(hin == 1 | kat == 1 | mal == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays